;; This file is part of scheme-GNUnet. -*- coding: utf-8 -*-
;; Copyright (C) 2021 GNUnet e.V.
;;
;; scheme-GNUnet is free software: you can redistribute it and/or modify it
;; under the terms of the GNU Affero General Public License as published
;; by the Free Software Foundation, either version 3 of the License,
;; or (at your option) any later version.
;;
;; scheme-GNUnet is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; Affero General Public License for more details.
;;
;; You should have received a copy of the GNU Affero General Public License
;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
;;
;; SPDX-License-Identifier: AGPL-3.0-or-later

(use-modules (web form)
	     (rnrs bytevectors)
	     (srfi srfi-64))

(define (urlencoded-string->alist string)
  (urlencoded->alist (string->utf8 string)))

(define-syntax-rule (test-decode name from to)
  (test-equal name (list to) (list (urlencoded-string->alist from))))

(test-begin "w-www-form-urlencoded")

(test-decode "empty list" "" '())
(test-decode "one field" "x=y" '(("x" . "y")))
(test-decode "two fields" "x=y&z=w" '(("x" . "y") ("z" . "w")))
(test-decode "spaces" "x+x+x=z+z+z" '(("x x x" . "z z z")))
(test-decode "forgot to encode spaces" "x x x=z z z" #f)
(test-decode "%-encoding" "x%01%02=x%03z" '(("x\x01\x02" . "x\x03z")))
(test-decode "%-encoding (NULL)" "%00x%01%02=x%03z" '(("\x00x\x01\x02" . "x\x03z")))
(test-decode "= in keys and values" "x%3Dz=0%3D1" '(("x=z" . "0=1")))

(test-decode "zero-length values" "x=&y=" '(("x" . "") ("y" . "")))
(test-decode "zero-length keys" "=z" '(("" . "z")))

;; IceCat 78.14.0 (a Firefox derivative) doesn't encode - and _, even though they should
;; be according to RFC 1866.
(test-decode "Firefox compatibility" "x-yz_w=0-12_3" '(("x-yz_w" . "0-12_3")))
(test-decode "Correct %-encoding of - and _" "%5F=%2D" '(("_" . "-")))

;; The specification uses uppercase letters.
(test-decode "no lowercase % (0)" "%aA=0" #false)
(test-decode "no lowercase % (1)" "%Aa=0" #false)

(test-decode "no %-encoding of A" "%41=0" #false)
(test-decode "no %-encoding of Z" "%5A=0" #false)
(test-decode "no %-encoding of a" "%61=0" #false)
(test-decode "no %-encoding of z" "%7A=0" #false)
(test-decode "no %-encoding of 0" "%30=0" #false)
(test-decode "no %-encoding of 9" "%39=0" #false)

;; While it might not be advisable, RFC 1866 does not forbid duplicate
;; field names.
(test-decode "duplicate field names" "field=value&field=value2"
	     '(("field" . "value") ("field" . "value2")))

(test-decode "leading &" "&oop=s" #false)
(test-decode "trailing &" "oop=s&" #false)
(test-decode "duplicated &" "o=o&&p=s" #false)
(test-decode "duplicated =" "oo==ps" #false)
(test-decode "too many =" "o=o=ps" #false)

;; RFC 1866 doesn't specify any character encoding, so assume UTF-8.
(define unicode-input "%C3%A9=%F0%9F%AA%82")
(define unicode-output '(("é" . "🪂")))
(test-decode "non-ASCII" unicode-input unicode-output)
(test-decode "bogus UTF-8" "%ED%9F%C0=z" #f)

(define (test-decode-with-encoding encoding)
  (parameterize (((fluid->parameter %default-port-encoding) encoding))
    (test-decode (string-append "non-ASCII, with " encoding
				" default port encoding")
		 unicode-input unicode-output)))

;; 'unescape' calls 'call-with-output-bytevector' without explicitely setting
;; the port encoding appropriately
(test-decode-with-encoding "UTF-8")
(test-decode-with-encoding "ISO-88519") ; doesn't support Unicode
(test-decode-with-encoding "UTF-16") ; two to four bytes per character
(test-decode-with-encoding "EBCDIC") ; non-ASCII compatible, doesn't support Unicode

(test-decode "non-ASCII input" "é=é" #f)
(test-assert "bogus UTF-8 (before decoding)"
  (not (urlencoded->alist #vu8(237 159 192 61 49))))

(test-end "w-www-form-urlencoded")
